{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "from concurrent.futures import ProcessPoolExecutor\n",
    "from pathlib import Path\n",
    "\n",
    "import matplotlib.pyplot as plt\n",
    "import torch\n",
    "\n",
    "from lhotse import CutSet, Fbank, LilcomFilesWriter, WavAugmenter\n",
    "from lhotse.dataset import SpeechRecognitionDataset\n",
    "from lhotse.recipes.librispeech import download_and_untar, prepare_librispeech"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Settings for paths"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "root_dir = Path('data')\n",
    "corpus_dir = root_dir / 'LibriSpeech'\n",
    "output_dir = root_dir / 'mini_librispeech_nb'"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Download and untar"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "download_and_untar(root_dir)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Prepare audio and supervision manifests"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "mini_librispeech_manifests = prepare_librispeech(corpus_dir, output_dir=output_dir)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# [Optional] Data augmentation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "use_data_augmentation = False\n",
    "augmenter = WavAugmenter.create_predefined('pitch_reverb_tdrop', sampling_rate=16000) if use_data_augmentation else None\n",
    "# It seems when spawning multiple Python subprocesses with the same sox handle it raises \"std::runtime_error: Couldn't close file\"\n",
    "# The issue seems to happen only in a Jupyter notebook on Mac OS X, hence the work around below.\n",
    "num_jobs = 1 if use_data_augmentation else os.cpu_count()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Extract features"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "for partition, manifests in mini_librispeech_manifests.items():\n",
    "    with LilcomFilesWriter(f'{output_dir}/feats_{partition}') as storage, ProcessPoolExecutor(num_jobs) as ex:\n",
    "        cut_set = CutSet.from_manifests(\n",
    "            recordings=manifests['recordings'],\n",
    "            supervisions=manifests['supervisions']\n",
    "        ).compute_and_store_features(\n",
    "            extractor=Fbank(),\n",
    "            storage=storage,\n",
    "            augmenter=augmenter,\n",
    "            executor=ex\n",
    "        ).pad()\n",
    "    mini_librispeech_manifests[partition]['cuts'] = cut_set\n",
    "    cut_set.to_json(output_dir / f'cuts_{partition}.json.gz')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Make PyTorch Dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "cuts_dev = SpeechRecognitionDataset(mini_librispeech_manifests['dev-clean-2']['cuts'])\n",
    "cuts_train = SpeechRecognitionDataset(mini_librispeech_manifests['train-clean-5']['cuts'])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Illustration of an example"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Transcript: IF THE READER WILL EXCUSE ME I WILL SAY NOTHING OF MY ANTECEDENTS NOR OF THE CIRCUMSTANCES WHICH LED ME TO LEAVE MY NATIVE COUNTRY THE NARRATIVE WOULD BE TEDIOUS TO HIM AND PAINFUL TO MYSELF\n",
      "Supervisions mask: tensor([1., 1., 1.,  ..., 0., 0., 0.], dtype=torch.float64)\n",
      "Feature matrix:\n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAA54AAAAvCAYAAACL8RWqAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAgAElEQVR4nO2df4xk2VXfP+fe96peV3X11HRP78zOes3aZLFlbMkYRMgvRIjCr0RxIqHEkZJAEomIgET+IJJJ/gASpCRESSQiEmSEJYMgxoIgEEkIECyFf2IMjsH2YrM/MuvdmZ2d3p6t6eqqflXv3Xvyx73vdc94Z3caM7Oe2fORSl396lXVffee++p+7zn3XFFVDMMwDMMwDMMwDONu4V7vAhiGYRiGYRiGYRgPNiY8DcMwDMMwDMMwjLuKCU/DMAzDMAzDMAzjrmLC0zAMwzAMwzAMw7irmPA0DMMwDMMwDMMw7iomPA3DMAzDMAzDMIy7yj0TniLyLSLyORF5SkTef6++1/jSRkQuicinROSTIvK7+di2iPyGiDyZ/57Nx0VEfizb0B+IyHte39Ib9wIR+aCIXBORT584dmobEZHvyOc/KSLf8Xpci3FvuI3N/JCIXM73mk+KyLedeO0Hss18TkS++cRx+916gyAij4rIR0XkCRH5jIh8Xz5u9xrjC3gVe7H7jPGKiEglIr8jIr+fbeaH8/G3iMjHcvv/vIgM8vFh/v+p/PpjJz7rFW3pvkBV7/oD8MDTwFuBAfD7wDvuxXfb40v7AVwCzt1y7EeB9+fn7wf+TX7+bcD/AAT4OuBjr3f57XFPbOTrgfcAn/7j2giwDTyT/57Nz8++3tdmj3tqMz8EfP8rnPuO/Js0BN6Sf6u8/W69sR7Aw8B78vMJ8EfZNuxeY4/T2IvdZ+xxO5sRYDM/L4GP5XvHR4D35eM/AXx3fv6PgZ/Iz98H/Pyr2dLrfX13+rhXHs+vBZ5S1WdUdQ18GHjvPfpu4/7jvcCH8vMPAX/9xPGf1sT/AaYi8vDrUUDj3qGq/xu4fsvh09rINwO/oarXVfVl4DeAb7n7pTdeD25jM7fjvcCHVXWlqv8PeIr0m2W/W28gVPUFVf1Efj4H/hB4BLvXGK/Aq9jL7bD7zBucfK84zP+W+aHANwK/kI/feo/p7j2/APwlERFub0v3BfdKeD4CPHfi/+d59Q5qvHFQ4NdF5PdE5LvysfOq+kJ+fhU4n5+bHRkdp7URsx0D4HtzWOQHu5BJzGaMW8ghbV9F8kjYvcZ4VW6xF7D7jHEbRMSLyCeBa6RJqaeBmaq2+ZST7d/bRn79BrDDfW4zllzIeL3586r6HuBbge8Rka8/+aKmuAJ9XUpm3BeYjRh3yH8Gvhx4N/AC8O9e3+IYX4qIyCbwi8A/UdWDk6/Zvca4lVewF7vPGLdFVYOqvht4E8lL+fbXuUj3nHslPC8Dj574/035mPEGR1Uv57/XgF8idcQXuxDa/PdaPt3syOg4rY2Y7bzBUdUX849+BH6S49AksxkDABEpSSLiZ1X1v+bDdq8xXpFXshe7zxh3gqrOgI8Cf4YUpl/kl062f28b+fUzwD73uc3cK+H5ceDxnLlpQFok+yv36LuNL1FEZCwik+458E3Ap0m20WUC/A7gl/PzXwH+Xs4m+HXAjRMhUMYbi9PayP8EvklEzubQp2/Kx4w3CLesB/8bpHsNJJt5X84g+BbgceB3sN+tNxR57dRPAX+oqv/+xEt2rzG+gNvZi91njNshIrsiMs3PN4C/TFob/FHg2/Npt95junvPtwO/laMubmdL9wXFa5/yxaOqrYh8L+nm64EPqupn7sV3G1/SnAd+Kd2/KYCfU9VfE5GPAx8RkX8IPAv8zXz+fydlEnwKWAJ//94X2bjXiMh/Ab4BOCcizwM/CPxrTmEjqnpdRP4l6Uce4F+o6p0mnzHuM25jM98gIu8mhUpeAv4RgKp+RkQ+AjwBtMD3qGrIn2O/W28c/hzwd4FP5TVYAP8Mu9cYr8zt7OVv233GuA0PAx8SEU9y/H1EVX9VRJ4APiwiPwL8X9KEBvnvz4jIU6Rkee+DV7el+wFJ4tkwDMMwDMMwDMMw7g6WXMgwDMMwDMMwDMO4q5jwNAzDMAzDMAzDMO4qJjwNwzAMwzAMwzCMu4oJT8MwDMMwDMMwDOOu8kUJTxH5FhH5nIg8JSLv/5MqlGEYhmEYhmEYhvHg8McWnjkd8I8D3wq8g5RC+h2v8Z7v+uN+n/HGxGzGOC1mM8ZpMZsxToPZi3FazGaM0/Kg2swX4/H8WuAiaaPT3wF2gPe+xnseyEo07ipmM8ZpMZsxTovZjHEazF6M02I2Y5yWB9JmXlN4isgHReSaiHz6xLFt4CeAIbAP/EXgB4BH7lZBDcMwDMMwDMMwjPsTUdVXP0Hk64FD4KdV9Z352I+SvJ3vBf4DSYB+GvjTqvq9t7z/u8iqvSjlq9/0thEoKIICgqIIks8/WRpBGUjLhgTWWSMrMCCiQK0FlbR4Se8/UqEkslZPwDGQloFAd40igqqy1IJCIg7Fo7QITX7PSBoERfJ3xVyyWksUGElDJJU/5vfJiXKrCl4iEcmfH1lpgQAbbo0itOrSa/k8j+LyJ6TrOC6fAo16vEQGRADa/J6IcKQlDsURKST29RgRBMUBEVjl8hcEYq6bm8qNEFTwko606hCgJZW1kIDPZx9piaqAkFvxC1EVRJSCmM4RCPm6XS6noOl7BIp8bS7Xfdcms+uR7R3p66JA+/rv2qlrh1JCbyPH368EhLUWDKWhVc9QWgJCq/5EGVN5FjqgICACPh9facGGNLQ4GvUUEogqDCXQ4vp6brMdBxVaUnk8MduYI6jry3ikJQNpCeooJFJryUjWhNy2Ky36duxsritv13/I7RPV4bK9dG3Z10Oup669VKVvs3iiDk++r+uTQ9dQEajxCMqQSMzv6x4VgQahJPUrgf4alzqgkqav30MdMJIGgMBxmwL4/O3rbG8A61zvAE3f/6W3la4NHREvStB0NbPrkXM7x5+rwDr3odQ+jhZHSaDIfTAioOAknR/U4XOdBhyedF5QR1fTBQEnx/av+fVUNndT3+jqv6/fV/h//zODV+xLxt2nYUXJ8PUuhnGfYPZinBazGeO03O82M+fll1R199bjxR289zuBvwZsnjj2D0jDpjHwTwEP/Crw8Vf7oDO7JT/4i+/Ci1LHsheHay0YSEsdSyrXENQRcIzdisfKPd45WPFU4/EotRa8rVyxF5VLzZQvL1/mnPeUeP6oUc75hithyJX2LI8V+5zzDUsVlrFg4hoCwidXF3nIz9n2S0oie3HEleYsAce7h8+z6yNBlVl03Iip0a+FCV4i7xq8RKNQq2cvjLkWJgwksFZPHUsGEnASifkaLhQznmt2KCXwV8fPcz0E9uKQsbTU6nvx3KjnvD9i7ITn2pJKAmdcYKnCM802lTS8tTzAAc+2I95WHhGBZ5oKJ0ncbLs1ZR7MeiDker8RPZfas8zCmF1/wDxu8Gi5z4DIQkt2/RGVKEGhVsdcS+axotaSRRziUaZ+QSUNHuVSc46Ao5SWZRz2AitkcdA990QeLfeppKXWY1PzKCOXPmuhRRLWWfRN3RGlRGaxolHP5eYsXzF4kVpL1up5pDhgP26kzy6WBIWlep5sdpm6JZU0OIk06pnHDbb9IbWWXGnO8o7hC1wPIx4tDqjVc7ndYiCBi8W8v/6Prx5J1ymR9wxeIgCfWD3Em4uXWWjJLI6opGEsa877o76+ByJcagd4lMvtFICH/Jzz/ohKoFa4EkY8Xh4RVPnY6gKPl3s82exywd/g6eYh3j54gXlM7Xk9bOKIzOMG7xk+x404ZKEDdtySkWu5HipmccRaPXvtFmO3onINpbQ0WrBWD9DbYTepEFRotMBLFm/5dU/MQivS5Lb6yuFl3lrWfLYZM5Y1bysj89iyVFirY6WegDCPFW8rD/jU+iy1lnzl4Frfzx4pXu6v+RPrc+z6OWNpadRxsWhxQK3KxKXy/uF60Ivvy+1ZHitf4rxf82IY9HWz65Z4UeaxZC9McEQeyW36dLNLJQ2PldeZxYrHi0NqhWfaM9nuVszCiIb0fVO3ZOLWXGnPUErLjlviRLnSnmHkVlTSMI8VjxU3CKT7yNPNLgFh7FaMZU2tJZU0uS42+ntbUDm2+Vz33fOTEyVNFs8/+/Y3vdrt0zAMwzAM477gN/UXnn2l43eyxvO/AVeAgYh8RkS+jyRCfwxYAVdJIvRbgcu3vllVP6CqX6OqX3Nmu+g9aqW0+Oy5AKhjiRNlrQW1lkQ99kIuY2ChA9Y4ai2ZaySosNQh81gyj4GGwBnX0CjMY5W9nmmgV4kylMDECbtO2PGHjNwqfUeugrV6GvVUvbcmlWuN7weWU7ekBEYCExcYuVUSw7Ek6vHndB4o6Lx8qbybMmTsJFe8suvX6dqz+Jw6x6aUNOo54wJTV7DtoJKGias54zyVHA9m16qs8TzX7LAfxkydY5LPKUWoRJg4z66PTFxNJQ1bruZCMWNAJCAEHNsORiJUAgOJTN2asaRHEi55wIxSa4mTdD11LPM1ptebLHZqzcdxNFpw0a95tFiy7WqmbsUZt2LqWhzKMg4ZS8OuP6KU1M5RhR131HuPai0ZuRVjt2IeB/33lMDYCZUEquwZKyUwlpaJW+NIEwCdEPAou37BQgsWWjB2K0ppmTqYiGPq0mRHJygqcUxdwUACI9cyliYLkQ1GrknnS6rzoElMLHTAQEIScRJzmxSMXRJTQ3GMnGfijpi4QCntTf2llJZH/GESt1n4DCRmca+5npN9TdwRW67GS2Tij3rR2eGzd9lnjyDQl22tBet8bjdRsNaCSBaqOCZuzUg8U1fjUTZdhRdhKDB2kYtFmlC4WMzxIjR4Io5aPUEl12+ajpi4op+4CAgj17IpJZUke62koJKCiVsfl18iI2kZi2MkLbt58mPiIiNRKgk4IpVrGElg6tYMJHn0owq7bskoC1qAkVv1tuNRxrLG56iH7VznE9dQ5r7hif2kyFBSvS809c+uv5fSUmYPeqMFQR2LODxxDXrT84EEgiYPfNAUZVBKuOk8wzAMwzCMB5E78XheIo3xIQnVf04Smt9JiuJ8OH/OdeChW998MtR25+KAOpb9wBcAhVVMYgZNA+CYw9mcdp4BYUCgUc9aPQ6o8sB72zc87EccxJoXwwZvK1fZo1VyNWwRmBPVsdABK10ycQ2X1ru8q3qOgFDHgmUcEvPAexYHDCV5DussfrvBfPe38/hUEljEAZVrmIUxY7fKg841TRziJbIXtvr3HWkaVM9jxZ5OmLiaZRxSScMBjrke4iQAnhvRE2lZRMdBrAC4Hg6YZAFTqybPayx7b+ReVLadciMqa3WMXWREZB47D7OwH8fUccCOPwRSuGKtKXw1ZE8uJMG90AGBTjg7FgwoJdBkgdLRXZ8jsohDBtKyiEO23FESairMYgojnLo1lShlbkOAhZYsQtnbxgFDLrgFjxQHyfPXiXgcV7N3r9GCpS5xqsziINWRg1Lb/JkDai0ZaKDOYmEeBziJzHN9dgL5ubbNgkNT+2Wxv9CI1/TdszhgFka9x/NqO6GSGZ0RTySVr47JM1trya6fsxeVibTMVXoB37GISZgvdJBECEIlLaWkupyFcZpoiSUe5bw/ZOJSuGuIQlTHXhyx1oJ52CDgmPoFizjMfQicxFyeQe/VXJ8Qp13UQROTB24eNnqBth83GIVD9sIZKmlYacOmlHgnzOO6nzZ6rt2C4oA6TwostGCcBXVUR63KPLbMwpiJqxkRmcUBnlXvMY5xzRk3yFEAZXrEkhfDJlN3QMTj0Cwuj+txkL3t17N9ldn2Ri55VRuN/Xc0+bOvtmeIOB4t95NXPwxotGDijlhoQZmvbBGHKQIDYanCSAK4FQt/yNX2DCWpL3QTAnNN9uvzxEwSwJ2JdLa+QSVrKln3faiOaRLEMAzDMAzjQeZOhOcV4N8C/4mUyfYFYA28mZRY6H8BXwdcIK3zvC0O5UI56z0DXUhdwLGKZb/maRmTwBmSBN2NeAMghWxKyUCEJq/bDAqfb5d4Sd6uoRRcasa9ZyWqo8oeiZFrk7fSHzGW5HFboVwsbnCtnTCQlv0wJiBMpMGh7Lgl+3HEftik8g3PhSHLOOxDGSvX5MFnzB7TdR/KWEpg6hdcjdPsGfE4PfbOnfSSVhIJCldapcxhtttuQNB17/EcO+H5tmCNZ9B7KJOHtpTARJR5TEIgIIzjmm1fM3XClquZxw12/Rx8Wic7ixuUErgShn09jVxLJUolh1xS34cQLnXIVJZ9mU+GaAL9NXti72GstezDCbvrXcaC0jVUTohdG+KYupqr7ZSpX+S2VjzHoYgDksCY+kXvCXWAFzjv18zjvK+Hi35NrevsRY99WziJjKVlpo6H/CFXw1ZeW9my69Iqz6lb9oLg2XaER5mFEReLG+z4RS9Wx25FUGEvjtLF+QUDAjiYtxu9J20iipe0frCSlmfbZOR77RbvGhzgmth7vMbZTifieiHSBM8i1+OQwPVYcD2MWOiAedhgFkY40T4stHJNH/65zpMmY7dmxx3SaEEd02eV0qYJmRwq7VwKsU3eu9S2u27JxAkX/CGlRJ5vlW3nGFJQiafJYmrqjpg6qNwarwUDItsu8HRe0/p8u4HL61hnYUTtmpuiBLq+PXcrSklCriT0dV0r7IVxLyofK2eMRJm4wCy2qZ9lT+le0CTIwwa1lgQWvb02FMnLmW0r2WzyYgfNHnGXJglKQgrHjamPTURZ5omZBp/CZfHZi5vqbiQrljrs15x2otMRqXXAQNp+cmrsVkRN5YoqN3lJDcMwDMMwHkTuRHi+DHw/MAB+j5RI6NeBbwamwN+CnL9D9ddufbOqfgD4AMCXvXOie+0WALMwumltYEo2kkTnyKUkK8lzNmAWk/dqFkcEhFmEicA8blDJDV4MJWsc18MmC11xoZjzTHOObX+YhRfshch573i+pRdTe2HMQgd5oJ48Qg0p5HZOEkqX27O9QO7WNJ4c1Hfevi5kshMtQVMymT3dYh6rXmyecVUSJK6mksA8X1ujnnn+rEvNOUq5xkBSOCfAfhgzkhssNA3Y94OkkEO3YpkHraUIsygcxCqLQGUZWxqNzOKIWRhRuXXv3WlyuF+ULIDxLELZr8nsPE/kutjXzSQuerGZBuH9WrXo+7W5qTyBp5td/kJ1mXlOCuREmTqHEwEXOe8PaXJoZCnJK1ZKy9wdMct1PQtjdsolBzpkP2z2HqYb8RAnyjKWfL7ZZqc45FozYcC1vvwxOiauZq/d4oJfcandTOvwtOzXAT5aOGoN3IiaQ00F8uSIl5g9rROmfsmOP2QvbDFxa+Z67KXtPLq9MO/DvB17MXll98KEHZeE0EHc4EqbPMdNDvOex0ESYJrWds5CErV7YYugrvdSd7bXrant1rPiYBlOTIrk9YmVNDTZI9pNkNSxPA471eQFXcWSoWtYxRTafD1WQM1n1+fxEnm83GOugYamXwO9iEP2dZOJ26OShjoO2PYNz7YbDCQwj1XqPzl6oVtbGnGsc1/r6supMo8DZnHU96d17hfdNXmJLGNBkMDqRNjs1TCi0SL37W5dq+v715are2+1J4n9FGouXPRHTNyaWn3vnZ/FEQMJlBJyqLdngrDWlrGsCS59fh1LIi7VcRaiB1R5bXS8qY0ge1GlORH+fbzcwDAMwzAM40HmToTnh4DHSf7IP5WP/STwbaQcNhFuiSE8wclQ27MPV8zDBpBESVTHUlNClkY9jfh+QAgnBmk5HNHnUMhaPfPo2PaHvBjKE57Nhl23pMHx1vIlavVMHcxi8rY+3wqNOp5rdpj6JWv1lASuhQlRhZUOkweI0As3L5E6DiF7W7p1gSELhsvNWebZc9hkD+FJT18lDVO/pI4lN2LKzfumQlnm9V3zuJEGqRKppKGUwI4/JKqwVqXuM5A6mrxm0klkriWrEPs1l5UElqp44cR6xzR47wbYSeAWfd12iYEqn85HXe9BrbVM4ZtdghRxjGRFUNeHkjbqGbl1Xo+bylVKYJ0TBo3digvFjLkKjxYNQdcEYJ7SrTKLRZ9EqPvOqEm41nl9b9BUz+scxtqFkJY503GVvaJelP12k53ikAMd9mGVXf1W0nA9HCdN6iYeHi0OmGXXVK2ea+2ExwYvMcget8UtExLdpMMsVsnDSfL2LuMwhTKHTWKuo1IiC03dw6N9+yZ7lf6aHEn4HcSKM8UNgqb26bxgKVlS0Zcb6IVYKYE6luy1k743d2tv5ySx2Xk1O9HZtVXUlCBqHjb6ZDerWPZt4XOm2h1/SJkjBFJiLVjGklqLfk3lMh7fSp5szvQTBmO3whFzAq6i9yymMPeyt+00SaK9fc7CuH+tC//urhdgmc/bC1s5IiCtcb3aTnMCJZ/7VNuvTd7PCZu6hFeXmnPg4GoY9/ePoNJPbKTvGeYw48hQHBMXGcgBTzeevbCF95E6DliLz3002eIqJBEPEFVuEpjd2nVPZKnHk1KGYRiGYRgPMneSXOh9wFeTtlS5QgqzfQvJy/ndpKy2z3EnIlY0J0DxrPKaw26Q1w2Eu8Qdqywypn55U8KRbk1VlQfcTpRSYj8gH7vI1KWMsQNSdtrdEwl91rheJDY5cVDnfXASj0McsxBwecB4nEwkJXkZEHL46zqL6CR+X2onaUCZPWCdgEiCJXn9UobONOjuBp0HoWLXHzEgci1MgKTqa5Xek7Lt2uQhVM+jfsXFIgnLeax4pjlH0JTo5yF/SOUaJm7Ntm+44AMlSdA60trYzpPnJfaJhDphVJ4QwZ2wCCos45BBPta10zIO+rrrPHfdtQd1XG7OUqtnrV1ym5T0qNE0IF/GYfLWcZyBdZFFXKrHYw9eg89JcNL37LgVu67N2YBTVleALVnl8M9l793uhGqXDKpLdHM1C5ypK/oEMXvtFtfCBCeRHbdIWWNlzbare5HZCbWYs/tO3VGeQDnuBvM4wENKXpSzNadESMmTVmVv2jIOiSpUrksklMJWK9f0EyCdnXR/BxIYuxVTv8ze+NQmnejqQp0hRRFEPd4WqFsze5JSAsu8RjPk7WkqaRlJy0Gs0lplHIt8fRPXcMat+j7iJIk7L5GpO2LXz1nktttyNVO37IX7Qgd9KGvMa4iTeFwzcXWOFkgJeBwpPPohf8iuXzCWNSPXMnUpMdA6Z4StpM3rY1u8pIiC1IdSgqXOPpc6ZBZHvRhtNIXfAjlSQBlJy5ar+0zVAaFEWMaQvPaQ+1eKGljEwU2ZhOeh6uu4UZ/vUYFBngiqtewFfuehPQzVF7SJYRiGYRjGg8SdeDx/jrRf5wYpm+0B8Ffya/8K+G3go8Dfec1Pyp6xkVv1HpXSNTcJlUZ97yno/l7wgedy8k9P5KJXGpRZ7LKjwtS1PLHe4ZOrh4g4pm7JbnFAJY5aY97SwvVCt/MmpT0tQx9mOHE1U1dTEmlwPNdO8cR+H0CgH8QehFG/rcIsjPIgM2XZJB6HDcf8vSuFGzGFcnbJVzqv5JavqUTBtX1I5ZUwoJTkUcElIepFmboj5tqtG2wYuOQF2stbn6RMnDFngHW953av3eLx4VWm7ojn2o3eC9OJkS1ZMczrDWPeUqSrq+thk+3isN8jtZQAEjgMFaUPvTDvM35KYOqXTP0ie5NJCZtiTmSE4CR5vrb9IRf8gsvNWbxo9vgez4mk9XLp/7Fbsd9uZg+aY5HXyI1kxX67k7ez0X6D0lrLXiAA/VYp3XOApQpNaBln5+5On+F0TZlFSzo/MnVLFm7Ye+267VvWuD4pT5/hFMdeLHKbOCrX5Eysqd3H0vah0p2YvRw2ecQfUpKEZRcSW2vJhWLGxK2ztzF58K62Z6i1YNfPUxbfXI4676GZyt3mJFHaTw6c8Ues1OVw8SSih3mLm+vtmDPFkm2fskR32Wm766jxlESqPEHR4LnoA/O4ZB43OO/XNApbOevubr9ut0n9LV/bOEczdJ5cR5pESt7yNct2yFYOSV9owSKWTHMm6ZHAWkKaXHBNX++VNMx01CeQcqJ41T68Nnl/Pa5Q5qHKmYMDpV9yPYyYx7KfeFkwZB6HXCxfZi9qvyVQrb5vs2RfdboGFWY67sNrO09nJzK7e0eqs6IPkd/0dR/ubhiGYRiG8aByJ8Lzx0lez0AKqa2AXyMlFBqQ1noeQc7icQsn13i++Z1b6iRyvd2kdC2rvC3EMgz7ELqhazgMFUNpOQwVDxUH7Adhxy3Yj2PW6rkeYxZvDUHpA30fLWZ9GONQkndiFiP1iT0HO09a52WDJKw6j+s0e0pi50GVNXu6xYXiBo0W/YC220h+6pZ5TVpgWi55dnWOWRgxcTWu36+v21vxeHBZ51DMMmfGHbs1i+iISPL6nNjC5EIxYy9sMVfphWSTP2tAxLmTCXyEul9fmLzLC1J45XaRtpGZxY0+OVAdS0qfyldTcKDDfguVSfbi7bUTNn2dsqfmfQpXOUnNKHt8G6DJA+yuLpPwb1jjuJxDULvXO1E0cXUOa03erxvtBpU0XChm/TX5vL3F1C25mvfIrDVlPO0mBLpQWEiJaMKJuo6k9ZGdwKmkSYN+PycgXM8eqkoCjw+v9ttspLWejkfKl5N3LHtOp37RTz6kbXlc75ntwqoHXQhs3o4DUqbdy8CAwIXiBns57LwLTQZYxiF7dJ7mhso3XGvTXpUBx/W87rPMiWp83jJmr5307X3GJ+9izNvldKHhLzWTPtHS5fVZNn2dRVOBQzgMFZs+bc/SqGcRHV60F44nbSx9szCLI3b8ITeiJo+jP+RGTH3tsfKl1FcQStKWL2v1LOKQC/4gX0foz7ne9a1sl40W7Mcx3bY/szDqw907cdet3ayz6E4TQWueXF1g6pdpPWZ+f7/9jwiXmm0q16Q1qnGjz3wdouRERJ5FnhDba7eSGJZ1ul/FIQex4iCmEOUuE7CXyCBvE9WFNE/8EXUo+77cZexOyc7SpMNJYWoYhmEYhvGgcifCs+MJksT7CuDP5vc+mV97iNsIz5NrPKcPV8n7l8MHS9fctD9hGox1oT7BVf8AAAOUSURBVKvJq3a1OcNBqJj642yjv7d6JIe4DfvsosvsmeiEUTfA7jZnb7Sgck1a05aT79Q5iUola5akrTaeWD2cxcxxeGOXCKnzRJ5kqUNmYcyWO6LWkrdVL9CoZz9sMnFHJG0OQ2l4Yn0hb2fh+8Q+U79kIIHKrfl4/WZ8Dkce5D0Kay15++BFFnHFp1YP9wlnurDSoJKzdCZPaxe2GHHMOfaodudeD5vJm5MH1XVe1+pz1tFS2hTSqq5PLjSPVdoGo98uJnlpIHl1V7Fk4o9YxZJzxQHLOMSJ8tnVRbbcUZ9FtSPmtZuduO7WmqaMqy37YfOmhE6LmJIKdYl4IK/xPOGFhrRPYylt702vteyTvHw+bDN2677+I47Pt9t9oqW0BYbr15l2NtBkofT44CqfqN/UH/ekxE5dO3XX1dXPrUljuuuJ6lhLshuvx+Hl58sbva0FpBfOkST+6pgywnaJbLqstss4TGIxe/TmsUr2Tcq62n2Gk8gkt5mTyLncJjfCKAvVgrPFou97I7fimXabOg76Okt7UKZ2m4eNtFcpkdqV/PbRWwl5zWiX4XjiarotkLyk7MApdP6IWdxgP2zeZOuQwqnTxFDVrz/1aP/6UocQ6dt9rhVlXmubN5Ch1pKpX+IkJdXqQmC77NlD56g1ic4df9jXV3cf6u4NJxM4pb5S5rXJoxzFEKncuk9W5TWFHA+kTdcvqV12iznX202GrmEgLWstGEib1/u6E9swGYZhGIZhPLiI6qtvXC4iu0CjqjMR2QA+C/wmsAX8oqp+WER+BXiLqr7rNT5rDnzuT6boxhuEc8BLr3chjPsKsxnjtJjNGKfB7MU4LWYzxmm5323my1R199aDd+LxfAfwHyVt2t6ll/wlYC8f/xFSZttfvoPP+pyqfs2dldcwQER+12zGOA1mM8ZpMZsxToPZi3FazGaM0/Kg2syd7uMZSKJTgZ9S1V8Vkd8iJRyqgU8CP3zXSmkYhmEYhmEYhmHct7ym8FTVPwC+6hWOf+NdKZFhGIZhGIZhGIbxQHGvc/h/4B5/n3H/YzZjnBazGeO0mM0Yp8HsxTgtZjPGaXkgbeY1kwsZhmEYhmEYhmEYxheD7VpuGIZhGIZhGIZh3FVMeBqGYRiGYRiGYRh3FROehmEYhmEYhmEYxl3FhKdhGIZhGIZhGIZxVzHhaRiGYRiGYRiGYdxVTHgahmEYhmEYhmEYd5X/D11hS/nLh3tMAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 1152x144 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "sample = cuts_dev[0]\n",
    "print('Transcript:', sample['text'])\n",
    "print('Supervisions mask:', sample['supervisions_mask'])\n",
    "print('Feature matrix:')\n",
    "plt.matshow(sample['features'].transpose(0, 1).flip(0));"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Make PyTorch Dataloader"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "train_dl = torch.utils.data.DataLoader(cuts_train, batch_size=16, shuffle=True)\n",
    "dev_dl = torch.utils.data.DataLoader(cuts_dev, batch_size=32)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "features - shape or length: torch.Size([16, 1727, 40])\n",
      "text - shape or length: 16\n",
      "supervisions_mask - shape or length: torch.Size([16, 1727])\n"
     ]
    }
   ],
   "source": [
    "for batch in train_dl:\n",
    "    for key, value in batch.items():\n",
    "        print(key, '- shape or length:', end=' ')\n",
    "        if isinstance(value, torch.Tensor):\n",
    "            print(value.shape)\n",
    "        else:\n",
    "            print(len(value))\n",
    "    break"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
